##Pre-processing of 5 m/o and 24 m/o mice - muscle snRNA data
This document includes the steps applied for pre-processing and clustering of the snRNA data obtained from [Petrany, et al. 2020] (https://www.nature.com/articles/s41467-020-20063-w#Sec4). The dataset matrices used to create the Seurat object were downloaded from [Synapses Repository] (https://www.synapse.org/#!Synapse:syn21676145/files/).
library(Seurat)
library(dplyr)
library(ggplot2)
Loading the matrices and calculating percent of reads that map to mitochondrial genome
#Load the 5m/o filtered matrices
months5_filtered <- Read10X_h5(filename = "5mo_filtered_feature_bc_matrix.h5", use.names = TRUE)
#Creating the R seurat object for 5 month old mice
fivemonth <- CreateSeuratObject(counts = months5_filtered, project = "A", min.cells = 3, min.features = 200)
#Checking percentage of reads that map to mitochondrial genome
fivemonth[["percent.mt"]] <- PercentageFeatureSet(fivemonth, pattern = "^MT-")
Visualizing QC metrics for the 5m/o datasts. The filtering cutoff has been set reads with features > 200 and < 3200
VlnPlot(fivemonth, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2)
#Visualizing feature-feature relationship using feature scatterplot
featurescatterplot <- FeatureScatter(fivemonth, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot(featurescatterplot)
#Filtering the reads with features < 200 and >3200
fivemonth <- subset(fivemonth, subset = nFeature_RNA > 200 & nFeature_RNA < 3200)
#Normalizing the Data
fivemonth <- NormalizeData(fivemonth)
#identifying highly variable features
fivemonth <- FindVariableFeatures(fivemonth, selection.method = "vst", nfeatures = 2000)
#identify top 10 highly variable genes
top10 <- head(VariableFeatures(fivemonth), 10)
#plot variables features
plot1variable <- VariableFeaturePlot(fivemonth)
plot2variable <- LabelPoints(plot = plot1variable, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1variable, plot2variable))
#Scaling the data
all.genes5mo <- rownames(fivemonth)
fivemonth <- ScaleData(fivemonth, features = all.genes5mo)
#performing linear pca
fivemonth <- RunPCA(fivemonth, features = VariableFeatures(object = fivemonth))
VizDimLoadings(fivemonth, dims = 1:2, reduction = "pca")
DimPlot(fivemonth, reduction = "pca")
DimHeatmap(fivemonth, dims = 1:15, cells = 500, balanced = TRUE)
Clustering cells and using UMAP for dimensionality reduction
#Clustering the cells
fivemonth <- FindNeighbors(fivemonth, dims = 1:12)
fivemonth <- FindClusters(fivemonth, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8197
## Number of edges: 279293
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8778
## Number of communities: 13
## Elapsed time: 0 seconds
#Running UMAP
fivemonth <- RunUMAP(fivemonth, dims = 1:12)
fivemonth <- RenameIdents(fivemonth, '0' = "Type IIb Myonuclei", '1' = "Type IIx Myonuclei", '2' = "Type IIb Myonuclei", '3' = "Type IIx Myonuclei", '4' = "FAPs", '5' = "Endothelial Cells", '6' = "Myotendinous Junction", '7' = "Smooth Muscle", '8' = "Satellite Cells", '9' = "Immune Cells", '10' = "Smooth Muscle", '11' = "Neuromuscular Junction", '12' = "Subcutaneous Fat")
DimPlot(fivemonth, reduction = "umap", label = TRUE)
fivemonth_muscle <- subset(fivemonth, idents = c("Type IIx Myonuclei", "Type IIb Myonuclei", "Satellite Cells", "Neuromuscular Junction", "Myotendinous Junction"))
fivemonth$CellType <- Idents(fivemonth)
Loading the matrices and calculating percent of reads that map to mitochondrial genome
# Get the data and create Seurat object
twentyfourmonth.data <- Read10X_h5("24-Month_filtered_feature_bc_matrix.h5", use.names = T)
twentyfourmonth <- CreateSeuratObject(counts = twentyfourmonth.data, project = "24-month", min.cells = 3, min.features = 200)
# Counting the percentage of reads that map to mitochondrial genome
twentyfourmonth [["percent.mt"]] <- PercentageFeatureSet(twentyfourmonth, pattern = "^MT-")
Visualizing QC metrics for the 5m/o datasts. The filtering cutoff has been set reads with features > 200 and < 3200
# Visualize QC metrics as a violin plot
VlnPlot(twentyfourmonth, features = c("nFeature_RNA", "nCount_RNA"), ncol = 3)
# Feature scatter plot
plot1 <- FeatureScatter(twentyfourmonth, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1
# Filtering cells with nFeature RNA between 200 - 3200 and percentage of mitochondrial RNA < 5
twentyfourmonth <- subset(twentyfourmonth, subset = nFeature_RNA > 200 & nFeature_RNA < 3200 & percent.mt < 5)
# Normalize data with log transformation (default setting)
twentyfourmonth <- NormalizeData(twentyfourmonth, normalization.method = "LogNormalize", scale.factor = 10000)
# Identify highly variable features across cells (feature selection), return 2,000 features per dataset
twentyfourmonth <- FindVariableFeatures(twentyfourmonth, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(twentyfourmonth), 10)
# Plot variable features with and without labels
plot1 <- VariableFeaturePlot(twentyfourmonth)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
plot1 + plot2
# Scaling the data (linear transformation required for dimensional reduction techniques like PCA)
all.genes24mo <- rownames(twentyfourmonth)
twentyfourmonth <- ScaleData(twentyfourmonth, features = all.genes24mo)
# Perform linear dimensional reduction
twentyfourmonth <- RunPCA(twentyfourmonth, features = VariableFeatures(object = twentyfourmonth))
# Examine and visualize PCA results
print(twentyfourmonth[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1
## Positive: Ebf1, Dlc1, Sox5, Sptbn1, Rhoj
## Negative: Slc8a3, Nrap, Pde4d, Nos1, Alpk3
## PC_ 2
## Positive: Shank3, Cyyr1, Adgrl4, Prkch, Ptprb
## Negative: Cacnb2, Cacna1c, Gucy1a2, Notch3, Pde3a
## PC_ 3
## Positive: Lvrn, Abca8a, Hmcn2, Dcn, Tnxb
## Negative: Kitl, Flt1, Adgrf5, Itpkb, Cyyr1
## PC_ 4
## Positive: Actn2, Agbl1, Sorbs1, Ank2, Ptpn3
## Negative: Myh4, Sorbs2, Mylk4, Pde4d, Actn3
## PC_ 5
## Positive: Thsd4, Itga8, Olfr78, Ccdc3, Myocd
## Negative: Plxdc1, Rgs5, Lin7a, Eps8, Rerg
VizDimLoadings(twentyfourmonth, dims = 1:2, reduction = "pca")
DimPlot(twentyfourmonth, reduction = "pca")
DimHeatmap(twentyfourmonth, dims = 1:15, cells = 500, balanced = TRUE)
Clustering cells and using UMAP for dimensionality reduction
# Cluster the cells
twentyfourmonth <- FindNeighbors(twentyfourmonth, dims = 1:15)
twentyfourmonth <- FindClusters(twentyfourmonth, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10137
## Number of edges: 350953
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8874
## Number of communities: 13
## Elapsed time: 1 seconds
# Look at cluster IDs of the first 5 cells
head(Idents(twentyfourmonth), 5)
## AAACCCAAGAACTCCT-1 AAACCCAAGACATAGT-1 AAACCCAAGGTGCATG-1 AAACCCAAGGTGCCAA-1
## 1 6 3 7
## AAACCCAAGTATGTAG-1
## 9
## Levels: 0 1 2 3 4 5 6 7 8 9 10 11 12
# Run non-linear dimensional reduction (UMAP/tSNE)
twentyfourmonth <- RunUMAP(twentyfourmonth, dims = 1:15)
DimPlot(twentyfourmonth, reduction = "umap", label = TRUE)
twentyfourmonth <- RenameIdents(twentyfourmonth, "0" = "Type IIb Myonuclei", "1" = "Type IIx Myonuclei", "2" = "Type IIb Myonuclei #2", "3" = "Type IIx Myonuclei #2", "4" = "FAPs", "5" = "Smooth Muscle", "6" = "Endothelial Cells", "7" = "Myotendinous Junction", "8" = "Satellite Cells", "9" = "Immune Cells", "10" = "Smooth Muscle #2", "11" = "Endothelial Cells #2", "12" = "Neuromuscular Junction", "13" = "Schwann Cells", "14" = "Adipocytes")
DimPlot(twentyfourmonth, reduction = "umap", label = TRUE)
twentyfourmonth_muscle <- subset(twentyfourmonth, idents = c("Type IIx Myonuclei", "Type IIb Myonuclei", "Satellite Cells", "Neuromuscular Junction", "Myotendinous Junction", "Type IIb Myonuclei #2", "Type IIx Myonuclei #2"))
saveRDS(fivemonth_muscle, file = "fivemonth_muscle.RDS")
saveRDS(twentyfourmonth_muscle, file = "twentyfourmonth_muscle.RDS")